

# load data
districts_dat <- read_dta("data/cleaned/ZA2472.dta")

# Divide data into pre/post 1980 ------------------------------------------
# This is necessary because we match votes and shapefiles from 1953 to 1976 by name,
# but from 1980 to 2017 we can use the AGS (geoids).

btw_53_76 <- btw_kreis_clean %>% 
    filter(year < 1980)

btw_80_17 <- btw_kreis_clean %>% 
    filter(year >= 1980)

shp_53_76 <- kreis_geo_dat_clean %>% 
    filter(year < 1980)

shp_80_17 <- kreis_geo_dat_clean %>% 
    filter(year >= 1980)

# Harmonise names prior to 1980 --------------------------------------------

# Names in Bundeswahlleiter dataset use different spelling conventions
# and/or newer/older denominations. To match them with the geo data they had
# to be manually crossreferenced.

data_path <- "data/raw/harm_kreis_names/"   
files <- dir(data_path, pattern = "*.xlsx") 

read_harm_names_53_76 <- function(file) {
    # create variables
    file_location <- file.path(data_path, file)
    year_2d <- str_extract(as.character(file), "^\\d{2}")
    year <- paste0("19", year_2d)
    
    # read data
    data <- read_excel(path = file_location) %>% 
        mutate(year = as.numeric(year)) %>%
        rename(gen = GEN, name = VOTE) %>% 
        select(-1, -check)
}

harm_kreis_names_53_76 <- map_dfr(.x = files, read_harm_names_53_76)

# harmonising the names

kreis_geo_dat_harm_53_76 <- left_join(shp_53_76, harm_kreis_names_53_76) 

# combining shp and vote data pre 1980

btw_kreis_geo_vote_53_76 <- left_join(btw_53_76, kreis_geo_dat_harm_53_76)

# Harmonise ags from 1980 on ----------------------------------------------

btw_80_17 <- btw_80_17 %>% 
    mutate(ags = if_else(nchar(ags) < 5, paste0(0, ags), ags),
           ags = if_else(str_detect(ags, "11100|11200"), "11000", ags),
           name = if_else(str_detect(name, "Berlin"), "Berlin", name)) %>% 
    group_by(ags, name, year) %>% 
    summarise(across(where(is.numeric), sum))

shp_80_17 <- shp_80_17 %>% 
    rename(ags = ags_orig)

btw_kreis_geo_vote_80_17 <- left_join(btw_80_17, shp_80_17)


# Combine Maerged DF ------------------------------------------------------

btw_kreis_geo_complete <- bind_rows(btw_kreis_geo_vote_53_76, btw_kreis_geo_vote_80_17) %>% 
    mutate(name_id = paste0(year, name)) %>% 
    ungroup()

# Project results to 2017 counties ----------------------------------------

kreise_West_GER_2017_wo_SL <- fit_into_hist_borders(df = btw_kreis_geo_complete, ags_lim = 10000)
kreise_West_GER_2017 <- fit_into_hist_borders(df = btw_kreis_geo_complete, ags_lim = 10999)
kreise_GER_2017 <- fit_into_hist_borders(df = btw_kreis_geo_complete, ags_lim = 17000)

# saveRDS(kreise_West_GER_2017_wo_SL, "data/cache/kreise_wo_SL_2017.rds")
# saveRDS(kreise_West_GER_2017, "data/cache/kreise_west_2017.rds")
# saveRDS(kreise_GER_2017, "data/cache/kreise_ger_2017.rds")
# 

# Define Extensive Vars -----------------------------------------------

varlist_ext <- c(
    "elig",
    "voters",
    "invalid",
    "valid",
    "csu_zweit",
    "cdu_zweit",
    "fdp_zweit",
    "spd_zweit",
    "die_linke_zweit",
    "grune_zweit",
    "afd_zweit",
    "drp_zweit",
    "npd_zweit",
    "rep_zweit",
    "other_zweit") 



aw_interpolate_kreise <- function(df, target) {
    aw_interpolate(
        .data = target,
        tid = ags_proj,
        source = df,
        sid = "name_id",
        output = "sf",
        weight = "sum",
        extensive = varlist_ext    )
}


ar_validate_kreise <- function(df, target) {
    ar_validate(
        target = target,
        source = df,
        varList = varlist_ext,
        verbose = T
    )
}

project_to_2017 <- function(df_list) {
    tic("Total projection time")
    
    df1 <- map(df_list[1], ~ aw_interpolate_kreise(.x, kreise_West_GER_2017_wo_SL), .progress = TRUE)
    df2 <- map(df_list[2:10], ~ aw_interpolate_kreise(.x, kreise_West_GER_2017), .progress = TRUE)
    df3 <- map(df_list[11:18], ~ aw_interpolate_kreise(.x, kreise_GER_2017), .progress = TRUE)
    
    result <- do.call(c, list(df1, df2, df3))
    toc()
    return(result)
}

validate_w_2017 <- function(df_list) {
    tic("Total validation time")
    
    v1 <- map(df_list[1], ~ ar_validate_kreise(.x, kreise_West_GER_2017_wo_SL), verbose = T, .progress = TRUE)
    v2 <- map(df_list[2:10], ~ ar_validate_kreise(.x, kreise_West_GER_2017), verbose = T, .progress = TRUE)
    v3 <- map(df_list[11:18], ~ ar_validate_kreise(.x, kreise_GER_2017), verbose = T, .progress = TRUE)
    
    result <- do.call(c, list(v1, v2, v3))
    toc()
    return(result)
}

# Implement the projection


list_by_year <- btw_kreis_geo_complete %>%
    st_sf() %>%
    group_by(year) %>%
    group_split()

validity_check <- validate_w_2017(list_by_year)  # Optional
list_projected_by_year <- project_to_2017(list_by_year)


names(list_projected_by_year) <- c("1953", "1957", "1961", "1965", "1969", "1972",
                                   "1976", "1980", "1983", "1987", "1990", "1994",
                                   "1998", "2002", "2005", "2009", "2013", "2017")

projected_by_year_raw <- list_projected_by_year %>% 
    bind_rows(.id = "year") %>% 
    as_tibble()


# Calculate Proportional voteshares ---------------------------------------



# Original county-level results -------------------------------------------

btw_kreis_1953_2017 <- btw_kreis_geo_complete %>% 
    mutate(cdu_csu_zweit = cdu_zweit + csu_zweit,
           turnout = voters/ elig) %>% 
    mutate(across(c(afd_erst:spd_zweit, cdu_csu_zweit, -elig), ~ calc_prop(.x, valid), .names = "{.col}_sh")) 

write_rds(btw_kreis_1953_2017, "data/cleaned/btw_all_counties_1953_2017.Rds")


# Projected county-level results ------------------------------------------

btw_projected_to_2017_clean <- projected_by_year_raw %>% 
    mutate(cdu_csu_zweit = cdu_zweit + csu_zweit,
           turnout = voters/ elig) %>% 
    mutate(across(c(afd_zweit:spd_zweit, cdu_csu_zweit, -elig), ~ calc_prop(.x, valid), .names = "{.col}_sh")) 

write_rds(btw_projected_to_2017_clean, "data/cleaned/btw_all_counties_1953_2017_proj_to_2017.Rds")


# Sanity Check 1 - Plot out Green Vote share 2002 --------------------------

btw_orig_w_geo_2002 <- btw_kreis_1953_2017 %>% 
    filter(year == "2002" ) %>% 
    rename(c("year_orig" = "year")) %>% 
    mutate(type = "Original") %>% 
    st_sf()

btw_orig_w_geo_1953 <- btw_kreis_1953_2017 %>% 
    filter(year == "1953" ) %>% 
    rename(c("year_orig" = "year")) %>% 
    mutate(type = "Original") %>% 
    st_sf()

# Sanity Check 2 - Plot out Green Vote share 2002 --------------------------

btw_projected_to_2017_clean_w_geo_2002 <- btw_projected_to_2017_clean %>% 
    filter(year == "2002" ) %>% 
    rename(c("year_orig" = "year", "year" = "year_proj")) %>% 
    mutate(type = "Projection") %>% 
    st_sf()

btw_projected_to_2017_clean_w_geo_1953 <- btw_projected_to_2017_clean %>% 
    filter(year == "1953" ) %>% 
    rename(c("year_orig" = "year", "year" = "year_proj")) %>% 
    mutate(type = "Projection") %>% 
    st_sf()



ggplot( ) +
    geom_sf(data = btw_projected_to_2017_clean_w_geo_1953 |> mutate(year = "2017"), aes(fill = cdu_csu_zweit_sh)) +
    geom_sf(data = btw_orig_w_geo_1953 |> mutate(year = "1953"), aes(fill = cdu_csu_zweit_sh)) +
    scale_fill_distiller(palette = "Blues", direction = 1, guide = "coloursteps") +
    theme_minimal() +
    labs(fill = "CDU/CSU") + 
    facet_grid(~ type + year) +
    theme(legend.position = "bottom",
          legend.direction = "horizontal")
ggsave("output/figures/figure_b5_elections.png")

ggplot( ) +
    geom_sf(data = btw_projected_to_2017_clean_w_geo_2002 |> mutate(year = "2017"), aes(fill = grune_zweit_sh)) +
    geom_sf(data = btw_orig_w_geo_2002 |> mutate(year = "2002"), aes(fill = grune_zweit_sh)) +
    scale_fill_distiller(palette = "Greens", direction = 1, guide = "coloursteps") +
    theme_minimal() +
    labs(fill = "B90/Grüne") + 
    facet_grid(~ type + year) +
    theme(legend.position = "bottom",
          legend.direction = "horizontal")


ggsave("output/figures/figure_b6_greens.png",
        dpi = 600)


